
**********
* Readme *
**********

* This script reweights each observation from PNAD (by region and by quarter) so that the characterisics of the PNAD sample match the population characteristics implied by POF.


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/2_4_reweight_pnad_${today}.smcl", replace


***************
* Import data *
***************

* PNAD data
use "${analysis}/data/2_3_pnad_clean.dta", clear

* POF data
append using "${analysis}/data/2_2_pof_clean.dta", generate(pof)

* Variables whose moments will be matched
unab covariates_all : rg_* age_* educ_* // all
unab ref_groups : rg_1_nonwhite_female age_1 educ_1 // reference group: non white female, 14-24, no education, Sao Paulo
global covariates : list covariates_all - ref_groups // final list of regressors

keep  pof pweight date_string region $covariates int_id
order pof pweight date_string region $covariates int_id

tab region date_string
snapshot erase _all
snapshot save


*****************************************************************
* Reweight PNAD to match POF moments (by quarter and by region) *
*****************************************************************

levelsof date_string, local(periods)
levelsof region, local(regions)

foreach period of local periods {
  display "* * * * * * * * *"
  display "*               *"
  display "*     `period'    *"
  display "*               *"
  display "* * * * * * * * *"
  
  foreach region of local regions {
    display "* * * * * * * * * * * * * *"
    display "*                         *"
    display "*     `period'" " and " "`region'     *"
    display "*                         *"
    display "* * * * * * * * * * * * * *"

    * Keep period i and region j
    snapshot restore 1
    keep if (date_string == "`period'" | pof == 1) & region == `region'
  
    * Reweight
    ebalance pof $covariates, targets(1) basewt(pweight) wttreat generate(new_pweight) maxiter(80)

    * Save
    keep if pof == 0
    save "${analysis}/data/temp_`period'_`region'.dta", replace

  }
}

*****************************************************
* Append all regions and all quarters back together *
*****************************************************

snapshot restore 1
levelsof date_string, local(periods) 
levelsof region, local(regions)
clear

* Append each file + housekeeping
foreach period of local periods {
  display "* * * * * * * * *"
  display "*               *"
  display "*     `period'    *"
  display "*               *"
  display "* * * * * * * * *"

  foreach region of local regions {
    display "* * * * * * * * * * * * * *"
    display "*                         *"
    display "*     `period'" " and " "`region'     *"
    display "*                         *"
    display "* * * * * * * * * * * * * *"

    append using "${analysis}/data/temp_`period'_`region'.dta"
    erase        "${analysis}/data/temp_`period'_`region'.dta"
  }
}

tab region date_string
snapshot erase _all

* Save master key and new weights
gen new_fweight = round(new_pweight)

keep int_id new_pweight new_fweight
save "${analysis}/data/2_4_pnad_new_pweight.dta", replace


***************************************************
* Merge the new weights into the big pnad dataset *
***************************************************

use "${analysis}/data/2_3_pnad_clean.dta", clear
merge 1:1 int_id using "${analysis}/data/2_4_pnad_new_pweight.dta", nogenerate

gen fweight = round(pweight)

order survey state region strata_id psu_id pweight fweight new_pweight new_fweight
save "${analysis}/data/2_4_pnad_clean.dta", replace

* Housekeeping
erase "${analysis}/data/2_4_pnad_new_pweight.dta"
erase "${analysis}/data/2_3_pnad_clean.dta"


* End of script
***************
cap log close